The following data is a subset of the NYC taxi dataset which contains the For-Hire Vehicle (“FHV”) trip records only.
For more, please refer to the original website.
In [ ]:
import pandas as pd
URL = "https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_{year}-{month:02}.csv"
COLUMN = "Pickup_date"
def get_monthly_values(year, month, freq):
"""Get monthly pickup counts for given year and time interval
frequency.
"""
file = URL.format(year=year, month=month)
print("Loading {}".format(file))
df = pd.read_csv(file, parse_dates=[COLUMN], usecols=[COLUMN])
size = df.memory_usage(deep=True).sum() / (2**20)
print("Finished loading. Total size: {:.2f}".format(size))
grouper = pd.Grouper(key=COLUMN, freq=freq)
counts = df.groupby(grouper).size()
return counts
def fetch_nyc_taxi_pickups(year, file, freq="1h"):
"""Get yearly pickup counts for given time interval frequency
and save as csv.
"""
months = [get_monthly_values(year, x, freq) for x in range(1, 13)]
df = pd.concat(months).to_frame("Pickup_Count")
df.to_csv(file)